\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{pythonhighlight}
\usepackage{url}
\usepackage{hyperref}
\usepackage{verbatim}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 使表格美观
\usepackage{array}
\newcolumntype{M}[1]{>{\centering\arraybackslash}m{#1}}
\setlength\extrarowheight{3pt}

% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{应用回归分析第2章：一元线性回归}
\author{HXQ ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{第2章目录 }

\begin{enumerate}

\item[2.1.] 一元线性回归模型
\item[2.2.] 参数的估计
\item[2.3.] 最小二乘估计的性质
\item[2.4.] 回归方程的显著性检验
\item[2.5.] 残差分析
\item[2.6.] 回归系数的区间估计
\item[2.7.] 预测和控制

\end{enumerate}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.1. 例子2-1}

\begin{itemize}
\item  {\color{red}问题：保险公司为确定火灾保险价格，想要确定居民住宅区的火灾损失 $y$ 与住宅区到消防站的距离 $x$ 之间的关系。搜集到的已有数据如下。}

\begin{table}[ht!]\centering
\caption{火灾损失表}\vspace{0.1cm}
\begin{tabular}{ |M{2cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|} \hline
距离 $x$ &3.4 &1.8 &4.6 &2.3 &3.1 &5.5 &0.7 &3.0 \\  \hline
损失 $y$ &26.2 &17.8 &31.3 &23.1 &27.5 &36.0 &14.1 &22.3 \\  \hline
距离 $x$ &2.6 &4.3 &2.1 &1.1 &6.1 &4.8 &3.8 & \\  \hline
损失 $y$ &19.6 &31.3 &24.0 &17.3 &43.2 &36.4 &26.1 & \\  \hline
\end{tabular}
\end{table}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.2. （距离，损失）散点图}

\begin{center}
\includegraphics[height=0.7\textheight, width=0.6\textwidth]{ex2-1-fire-insurance-scatter-graph.png}
\end{center}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.1.3. 画出散点图和回归线的R语言代码} 

{\color{blue}
\begin{verbatim}
> mydata<-read.table('ex2-1-fire-insurance.txt',header=T)
> mydata
> class(mydata)
> plot(mydata)
> abline(lm(y~x,data=mydata))
\end{verbatim}
}

\begin{itemize}
\item 第一行读入保存数据的文本文件，其中的数据第一列为距离 $x$, 第二列为损失 $y$, 中间用制表符 tab 隔开。表头为变量名 $x,y$. 
\item 第四行函数 \,{\color{blue}\verb+plot()+} 画出散点图。 
\item 第五行函数 \,{\color{blue}\verb+lm()+} 作出回归模型 $y\sim x$. 
\item 第五行函数 \,{\color{blue}\verb+abline()+} 在刚才的画图上添加回归直线。 
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.4. 一元线性回归模型的理论回归方程}

\begin{itemize}
\item {\color{red}问题：如何理解一元线性回归模型的{\color{red}理论回归方程}？ $$\boxed{y=\beta_0+\beta_1x+\varepsilon} $$ }

\item 解答：
\begin{itemize}
\item 由于 $x$ 的变化引起的 $y$ 的变化为 $\beta_0+\beta_1x$.
\item 由于其它随机因素引起的 $y$ 的变化为 $\varepsilon$, 称为随机误差。 
\item 随机误差 $\varepsilon$ 是一个随机变量，附加假设为
\[ \text{E}(\varepsilon)=0, \,\,\, \text{var}(\varepsilon)=\sigma^2. \] 

\item 自变量 $x$ 不是随机变量，因变量 $y$ 是随机变量。
\item 因变量 $y$ 的数学期望 $\text{E}(y\mid x) = \beta_0+\beta_1x$. 
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.5. 一元线性回归模型的样本回归方程}

\begin{itemize}
\item {\color{red}问题：如何理解一元线性回归模型的样本回归方程}？ 
{\color{red}
\begin{equation*}
\boxed{
\begin{array}{rcl}
y_1 &=& \beta_0+\beta_1x_1+\varepsilon_1 \\
y_2 &=& \beta_0+\beta_1x_2+\varepsilon_2 \\
\cdots && \cdots \\
y_n &=& \beta_0+\beta_1x_n+\varepsilon_n
\end{array}
}
\end{equation*}
}

\item 解答：
\begin{itemize}
\item 这是 $n$ 组观测数据 $\{(x_i,y_i),\,\, 1\le i\le n\}$ 代入理论回归方程的到的。
\item 基本假设为误差项 $\varepsilon_1,\varepsilon_2,\cdots, \varepsilon_n$ 是均值为零，方差相等，且互不相关。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.6. 一元线性回归模型的经验回归方程}

\begin{itemize}
\item {\color{red}问题：如何理解一元线性回归模型的经验回归方程}？ 
 {\color{red} $$ \boxed{\hat{y}=\hat{\beta}_0+\hat{\beta}_1 x }$$}
 
\item 解答：
\begin{itemize}
\item $\hat{\beta}_0, \hat{\beta}_1$ 是模型参数 $\beta_0,\beta_1$  的估计，通过样本数据计算的到。%最简单的是最小二乘估计。
\item  $\hat{y}$ 是当自变量为 $x$ 时，因变量 $y$ 的回归值，也称拟合值。
\item  $\hat{\beta}_0$ 是经验回归直线在纵轴上的截距。
\item  $\hat{\beta}_1$ 是经验回归直线的斜率，表示自变量改变一个单位，因变量的改变量。
\end{itemize}

\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.7. 一元线性回归模型的矩阵形式}

\begin{itemize}
\item {\color{red}问题：如何理解一元线性回归模型的矩阵形式}？ 
{\color{red}
\begin{equation*}
\boxed{
{\bf y} = {\bf X}{\boldsymbol\beta}+{\boldsymbol\varepsilon}
}
\end{equation*}
}

\vspace{-0.5cm}

\item 解答：
\begin{itemize}
\item 看到矩阵，先要明白把矩阵写具体是什么样子。这里的话我们有
\begin{eqnarray*}
%{\bf 1}=\begin{bmatrix} 1 \\ 1 \\ \vdots \\ 1  \end{bmatrix},
%{\bf x}=\begin{bmatrix} x_{1} \\ x_{2} \\ \vdots \\ x_{n}  \end{bmatrix},
{\bf y}=\begin{bmatrix} y_{1} \\ y_{2} \\ \vdots \\ y_{n}  \end{bmatrix},
X = \begin{bmatrix} 1 & x_{1} \\ 1 & x_{2} \\ \vdots & \vdots \\ 1 & x_{n} \\ \end{bmatrix}, 
\boldsymbol\beta = \begin{bmatrix} \beta_0 \\ \beta_1 \end{bmatrix},
\boldsymbol\varepsilon = \begin{bmatrix} \varepsilon_{1} \\ \varepsilon_{2} \\ \vdots \\ \varepsilon_{n}  \end{bmatrix}.
\end{eqnarray*}

\item 对误差项的基本假设为 $\text{E}(\boldsymbol\varepsilon)={\bf 0}$, $\text{var}(\boldsymbol\varepsilon)=\sigma^2 I_n$, 这里 $I_n$ 是 $n$ 阶单位矩阵。

\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.8. 误差项的正态分布假设}

\begin{itemize}
\item {\color{red}问题：我们经常假设误差项服从正态分布，且独立同分布，即
$$\varepsilon_i \overset{\text{iid}}{\sim} N(0,\sigma^2),\,\,\, 1\le i\le n. $$
这样做的好处是什么？}

\vspace{0.3cm}

\item 解答：
\begin{itemize}
\item 在参数和模型的检验的时候，可以构建熟悉的统计量。
\item 这样的话，回归值 $\hat{y}$ 的分布也知道了，即 $\hat{y} \sim N(\beta_0+\beta_1x, \sigma^2)$. 
\item 假设强一些，结论也会强一些。会给回归分析带来方便。
\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.9. 思考题1}

\begin{itemize}
\item {\color{red}问题：一元线性回归模型的数学形式和基本假定分别是什么？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.1.10. 思考题2}

\begin{itemize}
\item {\color{red}问题：理论回归方程和经验回归方程的区别有哪些？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.1. 普通最小二乘法}

\begin{itemize}
\item {\color{red}问题：给定数据 $\{(x_i,y_i),\,\,1\le i\le n\}$, 要寻找 $\beta_0,\beta_1$, 使得下式取最小值
\[ Q(\beta_0,\beta_1) = \sum\limits_{i=1}^{n} [y_i-\beta_0-\beta_1x_i]^2. \] }

\item 注解：这个表达式是接下来课件2.2.8页的图上短线段的长度的平方和。

\item 定义：{\color{blue}参数的最小二乘估计}是指使上式最小的参数，即
\[ (\hat{\beta}_0,\hat{\beta}_1) = \underset{\beta_0,\beta_1}{\text{argmin}}\,\, Q(\beta_0,\beta_1). \]

\item 定义：称 $\hat{y}_i=\hat{\beta}_0+\hat{\beta}_1x_i$ 为 $y_i$ 的{\color{blue}回归值}或{\color{blue}拟合值}。

\item 定义：称 $e_i=y_i-\hat{y}_i$ 为 $y_i$ 的{\color{blue}残差}。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.2. 普通最小二乘法的求解过程}

\begin{itemize}
\item 解答：将目标函数对参数求导，并令其为零，可得
\begin{eqnarray*}
\left\{\begin{array}{rcl}
\frac{\partial Q}{\partial \beta_0} &=&  \sum\limits_{i=1}^{n} 2[y_i-\beta_0-\beta_1x_i]\cdot (-1)=0, \\
\frac{\partial Q}{\partial \beta_1} &=&  \sum\limits_{i=1}^{n} 2[y_i-\beta_0-\beta_1x_i]\cdot (-x_i)=0.
\end{array}\right.
\end{eqnarray*}

观察这个方程组，引进下述记号，以简化这个方程组，
\begin{eqnarray*}
%\begin{gather*}
&{\color{red}\bar{x}}=\frac{1}{n}\sum\limits_{i=1}^{n}x_i, \hspace{0.3cm}
{\color{red}\bar{y}}=\frac{1}{n}\sum\limits_{i=1}^{n}y_i, \\
&{\color{red}L_{xx}}=\sum\limits_{i=1}^{n}(x_i-\bar{x})^2=\sum\limits_{i=1}^{n}x_i^2-n(\bar{x})^2, \hspace{0.3cm}
{\color{red}L_{yy}}=\sum\limits_{i=1}^{n}(y_i-\bar{y})^2=\sum\limits_{i=1}^{n}y_i^2-n(\bar{y})^2, \\
&{\color{red}L_{xy}}=\sum\limits_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y})==\sum\limits_{i=1}^{n}x_iy_i-n\bar{x}\bar{y}. 
%\end{gather*}
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.3.a. 普通最小二乘法的求解过程（续）}

\begin{itemize}
\item 解答（续）：上一页的方程组变成了
\begin{eqnarray*}
\left\{\begin{array}{rrrrr}
\bar{y} & - {\color{red}\beta_0} & - \bar{x} {\color{red}\beta_1}  & = &  0, \\
(L_{xy}+n\bar{x}\bar{y}) & - n\bar{x} {\color{red}\beta_0} & - (L_{xx}+n(\bar{x})^2) {\color{red}\beta_1} & = & 0. 
\end{array}\right.
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.3.b. 普通最小二乘法的求解过程（续）}

\begin{itemize}

\item 解答（续）：由克拉默公式，可得下述参数估计公式
\begin{eqnarray*}
\hat{\beta}_1 &=& \frac
{\begin{vmatrix}1&\bar{y} \\ n\bar{x}&L_{xy}+n\bar{x}\bar{y}\end{vmatrix}}
{\begin{vmatrix}1&\bar{x} \\ n\bar{x}&L_{xx}+n(\bar{x})^2 \end{vmatrix}}
=\frac{L_{xy}}{L_{xx}}, \\
\hat{\beta}_0 &=& \frac
{\begin{vmatrix}\bar{y}&\bar{x} \\ L_{xy}+n\bar{x}\bar{y}&L_{xx}+n(\bar{x})^2\end{vmatrix}}
{\begin{vmatrix}1&\bar{x} \\ n\bar{x}&L_{xx}+n(\bar{x})^2 \end{vmatrix}}
=\frac{\bar{y}L_{xx} - \bar{x}L_{xy}}{L_{xx}}.
\end{eqnarray*}

%\item 代入第一个方程，得到
%\begin{eqnarray*}
%\hat{\beta}_0 = \bar{y}-\bar{x}\hat{\beta}_1 = \bar{y}-\bar{x}\frac{L_{xx}}{L_{xy}}.
%\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.2.4. 对例2-1的数据计算回归直线} 

\begin{itemize}
\item {\color{red}问题：求出例2-1中，火灾损失 $y$ 对房屋与消防站的距离 $x$ 的回归方程。}

\item 解答：使用R语言中的 \,{\color{blue}\verb+lm()+}函数，结果为
{\color{blue} $$\boxed{\hat{y} = 10.278 + 4.919 x}.$$}

\item 注解：下述代码中，\,{\color{blue}\verb+attach()+} 函数将数据框\,{\color{blue}\verb+mydata+} 里的变量\,{\color{blue}\verb+x,y+} 直接放到工作空间，这样下一页的代码里可以直接使用。\,{\color{blue}\verb+summary()+}函数查看回归结果，从中可以读出两个系数的估计值。

{\color{blue}
\begin{verbatim}
> mydata <- read.table('ex2-1-fire-insurance.txt',header=T)
> attach(mydata)
> lm01 <- lm(y~x,data=mydata)
> summary(lm01)
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.2.5. 对例2-1的数据计算回归直线（续）} 

\begin{itemize}
\item 使用参数估计公式，直接进行计算。

{\color{blue}
\begin{verbatim}
> xbar <- mean(x)  
> ybar <- mean(y)  
> Lxx <- sum((x-xbar)^2)
> Lxy <- sum((x-xbar)*(y-ybar))
> beta0hat <- ybar-xbar*Lxy/Lxx
> beta1hat <- Lxy/Lxx
\end{verbatim}
}

\item 注解：R语言跟 Matlab 的一个差异是，两个向量的按分量运算，R语言不需要加点。
在上述计算中，变量\,{\color{blue}\verb+x,y+} 是两个向量，其余都是数量。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.2.6. R语言的回归结果} 

\begin{itemize}
\item {\color{red}问题：使用 \,{\verb+summary()+}函数的结果如下，请解释。}
{\footnotesize\color{blue}
\begin{verbatim}
> summary(lm01)
Call: lm(formula = y ~ x, data = mydata)
Residuals:
    Min      1Q  Median      3Q     Max 
-3.4682 -1.4705 -0.1311  1.7915  3.3915 
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  10.2779     1.4203   7.237 6.59e-06 ***
x             4.9193     0.3927  12.525 1.25e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.316 on 13 degrees of freedom
Multiple R-squared:  0.9235,	Adjusted R-squared:  0.9176 
F-statistic: 156.9 on 1 and 13 DF,  p-value: 1.248e-08
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.7. 回归结果的解释（续上一页）}

\begin{itemize}
\item Call: 把回归模型和数据重述一遍。
\item Residuals: 残差的五个四分位数。
\item Coefficients: 两个回归参数的估计值、标准误、$t$统计值、$p$值。
\item Intercept: 截距项。
\item Significance code: 显著性的记号，例如三颗星表示$p$值小于0.001.
\item Residual standard error: 误差项的标准差的估计值。
\item Multiple R-squared: $R^2$ 统计值。
\item F-statistic: $F$统计值。
\item DF: degrees of freedom, 自由度。
\item p-value: 模型的$p$值。
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.8. 例2-1的回归图像：回归值、回归线、残差}

\begin{center}
\includegraphics[height=0.7\textheight, width=0.6\textwidth]{ex2-1-fire-insurance-regression.png}
\end{center}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.2.9. 例2-1的回归图像的代码} 

\begin{itemize}
\item 代码：接着课件2.2.4页的代码。
{\color{blue}
\begin{verbatim}
> yhat <- lm01$fitted.values
> plot(x,y)
> abline(lm01)
> segments(x,y,x,yhat,col='red',lwd=3)
> points(x,yhat)
\end{verbatim}
}

\item 注解：\,{\color{blue}\verb+segments()+} 函数画出这些短线段。前面四个参数分别是起点的横坐标和纵坐标，和终点的横坐标和纵坐标，后面两个参数是指定颜色和线的宽度。\,{\color{blue}\verb+points()+} 函数是在刚才的图像上继续加点，参数分别是所有点的横坐标和纵坐标。

\item 注解：\,{\color{blue}\verb+lm01+} 是一个列表数据，保存回归模型的很多信息。\,{\color{blue}\verb+lm01$fitted.values+} 是提取了其中的回归值。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.10. 最大似然估计法(*)}

\begin{itemize}
\item {\color{red}问题：设回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 的误差项 $\varepsilon\sim N(0,\sigma^2)$. 
给定观测数据 $\{(x_i,y_i),1\le i\le n\}$, 用最大似然估计方法，计算参数 $\beta_0,\beta_1$ 的估计。}

\item 解答：
\begin{itemize}
\item 写出因变量服从的分布 $y_i\sim N(\beta_0+\beta_1x_i,\sigma^2)$, 写出其密度函数 $f_i(y_i)$.
\item 写出似然函数 $L(\beta_0,\beta_1,\sigma^2)=\prod f_i(y_i)$.
\item 将似然函数具体写出来，问题转化成了求最小二乘估计法里的目标函数的最小值。
\item 所谓最大似然估计方法，就是求参数的值，使得似然函数取到最大值。
\item 最大似然估计的原理是，既然这个样本被观测到了，那么这个样本发生的概率应该是在可能的范围内达到最大的。
\end{itemize}

%\vfill
%\item 注(*)：本节内容暂时不做要求。
\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.2.11. 思考题1}

\begin{itemize}
\item {\color{red}问题：普通最小二乘法能解决什么样的问题？解决思路是什么？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.2.12. 思考题2} %若使用verbatim环境
%\begin{frame}{2.2.12. 思考题2}

\begin{itemize}
\item {\color{red}问题：使用R语言进行线性回归，我们经常用 \,{\color{blue}\verb+lm()+} 函数。这个函数是怎么使用的？如何解读这个函数的输出结果？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.1. 参数的最小二乘估计是因变量样本的线性函数}

\begin{itemize}
\item {\color{red}问题：设有一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$, 设有样本数据 $\{(x_i,y_i),\,\, 1\le i\le n\}$. 
证明参数 $\beta_0,\beta_1$  的最小二乘估计 $\hat{\beta}_0, \hat{\beta}_1$ 是因变量的样本数据 $\{y_i,\,\, 1\le i\le n\}$ 线性函数。}

\item 解答：由上一节可知，参数 ${\beta}_1$ 的最小二乘估计 $\hat{\beta}_1$ 有如下表达式， 
\begin{eqnarray*}
\hat{\beta}_1=\frac{L_{xy}}{L_{xx}}= \frac{\sum\limits_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y})}{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2}
= \frac{\sum\limits_{i=1}^{n}(x_i-\bar{x})\cdot y_i}{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2}
-\frac{\sum\limits_{i=1}^{n}(x_i-\bar{x})\cdot \bar{y}}{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2}.
\end{eqnarray*}
上式的第二项的分子可以提取公因式 $\bar{y}$, 然后因为 $\sum\limits_{i=1}^{n}(x_i-\bar{x})$ 等于零，所以第二项等于零。
这样就证明了 $\hat{\beta}_1$ 是 $\{y_i,\,\, 1\le i\le n\}$ 的线性组合，并且 $y_i$ 的组合系数是 $(x_i-\bar{x})/L_{xx}$. 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.2. 参数的最小二乘估计是无偏估计}

\begin{itemize}
\item {\color{red}问题：如何理解参数的最小二乘估计是无偏估计？}

\item 解释题目：
\begin{itemize}
\item 理论回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 中的参数 $\beta_0,\beta_1$ 是真值，但是未知的。
\item 因变量的样本数据 $\{y_i,\,\, 1\le i\le n\}$ 在观测前是随机变量，假设相互独立。
\item 参数的最小二乘估计 $\hat{\beta}_0, \hat{\beta}_1$ 是 $\{y_i,\,\, 1\le i\le n\}$ 的函数，故也是随机变量。
\item 所谓无偏估计，指的是 $\text{E}(\hat{\beta}_0)=\beta_0$ 和 $\text{E}(\hat{\beta}_1)=\beta_1$.
\end{itemize}

\item 具体验证：根据参数的最小二乘估计的公式，根据求数学期望的线性性质，再根据假设 $\text{E}(\varepsilon_i)=0$ 得 
$\text{E}(y_i)=\beta_0+\beta_1x$, 计算可证。第一步如下：
\begin{eqnarray*}
{\color{red}\text{E}}(\hat{\beta}_1)= \frac{\sum\limits_{i=1}^{n}(x_i-\bar{x})\cdot {\color{red}\text{E}}(y_i)}{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2}.
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.3. 参数的最小二乘估计的方差}

\begin{itemize}
\item {\color{red}问题：参数的最小二乘估计 $\hat{\beta}_0, \hat{\beta}_1$ 在样本数据代入之前是随机变量。上一页我们已经知道它们的数学期望就是参数的真值本身。那么它们的方差是多少呢？}

\item 解答：根据假设，因变量样本数据 $\{y_i,\,\, 1\le i\le n\}$ 是相互独立的。由于 $\hat{\beta}_1$ 是它们的线性组合，所以  
$\hat{\beta}_1$ 的方差等于这个线性组合的每一项的方差的和。注意到方差的性质 $\text{var}(c_iy_i)=c_i^2\text{var}(y_i)$. 具体计算如下。
\begin{eqnarray*}
{\color{red}\text{var}}(\hat{\beta}_1)
= \frac{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2\cdot {\color{red}\text{var}}(y_i)}{\left[\sum\limits_{i=1}^{n}(x_i-\bar{x})^2\right]^2}
= \frac{\sigma^2}{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2}
=\frac{\sigma^2}{L_{xx}}.
\end{eqnarray*}


%\vfill

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.4. 一些注解}

\begin{itemize}
\item 注解：一个估计量的方差比较大的话，用这个估计量来估计参数的效果就不太好。这是研究估计量的方差的重要原因。

\item 参数 ${\beta}_0$ 的最小二乘估计量 $\hat{\beta}_0$ 的无偏性和方差，也类似计算。
\begin{eqnarray*}
\text{var}(\hat{\beta}_0)
= \left[ \frac{1}{n} + \frac{(\bar{x})^2}{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2} \right]  \sigma^2
=\frac{L_{xx}+n(\bar{x})^2}{nL_{xx}}\sigma^2.
\end{eqnarray*}

\item 从 $\text{var}(\hat{\beta}_1)$ 和 $\text{var}(\hat{\beta}_0)$ 的表达式可以看出，为了使得估计的方差减小，自变量的取值应该分散一些（增大 $L_{xx}$），样本量应该大一些（增大 $n$）。




\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.5. 在误差项服从正态分布的假设下}

\begin{itemize}
\item {\color{red}问题：基本假设要求误差项 $\{\varepsilon_i,1\le i\le n\}$ 的均值为零，方差一致为 $\sigma^2$, 且两两不相关。如果假设增强为误差项服从正态分布 $N(0,\sigma^2)$, 那么参数的最小二乘估计量服从什么分布？}

\item 解答：
\begin{itemize}
\item 由误差项 $\{\varepsilon_i,1\le i\le n\}$ 服从独立正态分布，可得样本数据 $\{y_i,1\le i\le n\}$ 服从相互独立的正态分布。
于是它们的线性组合仍是正态分布。

\item 由本节关于参数估计量的均值和方差的讨论，可得下述结果。
\begin{eqnarray*}
\hat{\beta}_0 \sim N(\beta_0, \text{var}(\hat{\beta}_0)), \,\,\,  \hat{\beta}_1 \sim N(\beta_0, \text{var}(\hat{\beta}_1)).
\end{eqnarray*}
其中的 $\text{var}(\hat{\beta}_0)$ 和 $\text{var}(\hat{\beta}_1)$ 请见前面两页。

\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.6. 高斯-马尔可夫条件}

\begin{itemize}
\item {\color{red}问题：如何理解高斯-马尔可夫条件？}

\item 解答：
\begin{itemize}
\item 高斯-马尔可夫条件是指回归模型的误差项 $\{\varepsilon_i,1\le i\le n\}$ 的均值为零，方差相同，且两两不相关。
\item 用数学语言来描述就是，对 $1\le i,j\le n$, 其中 $i\neq j$, 成立：
{\color{red}\[ \boxed{\text{E}(\varepsilon_i)=0,\,\, \text{var}(\varepsilon_i)=\sigma^2,\,\, \text{cov}(\varepsilon_i,\varepsilon_j)=0}.\] }

\vspace{-0.4cm}

\item 在高斯-马尔可夫条件下，参数的最小二乘估计是{\color{red}最小方差线性无偏估计} (BLUE)，即在一切线性无偏估计中，这个估计量的方差最小。
\item 误差项服从独立同分布的正态分布 $N(0,\sigma^2)$ 这个条件要强于高斯-马尔可夫条件。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.7. 预测}

\begin{itemize}
\item {\color{red}问题：设有一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$, 从样本数据 $\{(x_i,y_i),\,\, 1\le i\le n\}$ 求出了参数的最小二乘估计，得到了经验回归方程 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x$. 设现在有一个新的观测，自变量 $x$ 取值为 $x_0$. 求对因变量 $y$ 的预测 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x_0$ 的分布。}

\item 解答：
\begin{itemize}
\item 设误差服从正态分布。则 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x_0$ 是一些正态分布的随机变量的线性组合，因此也服从正态分布。因此 $\hat{y}_0 \sim N(\text{E}(\hat{y}), \text{var}(\hat{y}))$. 
\item 计算 $\hat{y}$ 的均值，可得 $\text{E}(\hat{y})=\beta_0+\beta_1x_0$.
\item 计算 $\hat{y}$ 的方差，可得 $\text{var}(\hat{y})=\left[ \frac{1}{n}+\frac{(x_0-\bar{x})^2}{L_{xx}} \right]\sigma^2$. 细节如下：
\begin{eqnarray*}
\text{var}(\hat{y}) &=& \text{cov}(\hat{y},\hat{y})= \text{cov}(\hat{\beta}_0+\hat{\beta}_1x_0,\hat{\beta}_0+\hat{\beta}_1x_0) \\
&=& \text{cov}(\hat{\beta}_0,\hat{\beta}_0) +\text{cov}(\hat{\beta}_1,\hat{\beta}_1)x_0^2 + 2\text{cov}(\hat{\beta}_0,\hat{\beta}_1)x_0.
\end{eqnarray*}

\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.8. 思考题1}

\begin{itemize}
\item {\color{red}问题：考虑一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$, 如何理解参数 $\beta_0,\beta_1$ 的最小二乘估计
$\hat{\beta_0},\hat{\beta}_1$ 是随机变量这件事情？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.9. 思考题2}

\begin{itemize}
\item {\color{red}问题：考虑一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$, （默认假设是误差项服从高斯-马尔可夫条件）。计算参数 $\beta_0$ 的最小二乘估计 $\hat{\beta}_0$ 的均值和方差。}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.3.10. 思考题3}

\begin{itemize}
\item {\color{red}问题：考虑一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$, （默认假设是误差项服从高斯-马尔可夫条件）。计算参数的最小二乘估计 $\hat{\beta}_0$ 和 $\hat{\beta}_1$ 的协方差。}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.1. 为什么需要检验回归方程的显著性}

\begin{itemize}
\item {\color{red}问题：为什么需要检验回归方程的显著性？}

\item 解答：
\begin{itemize}
\item 变量 $y$ 和 $x$ 之间的统计规律性还没有验证。谁知道经验回归方程 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x$ 是否真的反映了这两个变量之间的统计规律呢？
\item 误差项是否服从正态分布也没有验证。%基本假设
%\item 
%\item 
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.2. 检验回归系数的显著性}

\begin{itemize}
\item {\color{red}问题：在线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 中，系数 $\beta_1$ 是否显著不等于零？换句话说，在影响变量 $y$ 的因素中，变量 $x$ 是否显著？}

\item 解答：
\begin{itemize}
\item 首先写出假设检验 \( H_0: \beta_1=0, \text{  vs.  } H_1:\beta\neq 0.\) 就是零假设认为不显著。
\item 在正态误差的假设下，估计量 $\hat{\beta}_1$ 服从正态分布 \(\hat{\beta}_1\sim N(\beta_1,\sigma^2/L_{xx}) . \)

\item 当 $H_0$ 为真时，$\hat{\beta}_1\sim N(0, \sigma^2/L_{xx})$. 因此构造统计量，并求出其分布
\[ T= \frac{\hat{\beta}_1}{\sqrt{\hat{\sigma}^2/L_{xx}}} = \frac{\hat{\beta}_1}{\hat{\sigma}/\sqrt{L_{xx} } } \sim t(n-2),
\,\, \text{ 其中 } \,\, \hat{\sigma}^2 = \frac{1}{n-2}\sum\limits_{i=1}^{n} (y_i-\hat{y}_i)^2. \] 

\item 代入样本数据，当 $T$ 统计量的统计值 $t$ 的绝对值 $|t|$ 很大时，例如 $|t|\ge t_{0.05}(n-2)$, 就拒绝零假设，认为参数 $\beta_1$ 显著不等于零，即自变量 $x_1$ 是显著的。

\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.3. 检验回归系数的显著性（一些注解）}

\begin{itemize}
\item 误差项的方差 $\sigma^2$ 是未知参数，所以用样本数据代入，得到估计值 $\hat{\sigma}^2$. 这样构造的统计量一般是 $t$ 分布。%如果方差 $\sigma^2$ 已知的话，构造的统计量一般是正态分布。

\item 代入样本数据，得到统计量 $T$ 的统计值 $t$ 之后，有两种方法确定是否拒绝零假设（这里假设置信水平 $\alpha=0.1$）：
\begin{enumerate}
\item 看 $t$ 是否落在拒绝域 $\{t: |t|\ge t_{0.05}(n-2)\}$. 若落入则拒绝零假设。
\item 计算 $p$ 值 $p=\text{P}(|T|\ge t)$, 若 $p<0.1$ 则拒绝零假设。
\end{enumerate}

\item 这里有关假设检验的知识，可以参考茆诗松等人的《概率论与数理统计教程》第七章。有关 $t$ 统计量的概念，参考第五章。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.4. 例子2-1. 参数的显著性检验}

\begin{itemize}
\item 数据：保险公司为确定火灾保险价格，想要确定居民住宅区的火灾损失 $y$ 与住宅区到消防站的距离 $x$ 之间的关系。搜集到的已有数据如下。

\begin{table}[ht!]\centering
\caption{火灾损失表}\vspace{0.05cm}
\begin{tabular}{ |M{2cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|M{0.8cm}|} \hline
距离 $x$ &3.4 &1.8 &4.6 &2.3 &3.1 &5.5 &0.7 &3.0 \\  \hline
损失 $y$ &26.2 &17.8 &31.3 &23.1 &27.5 &36.0 &14.1 &22.3 \\  \hline
距离 $x$ &2.6 &4.3 &2.1 &1.1 &6.1 &4.8 &3.8 & \\  \hline
损失 $y$ &19.6 &31.3 &24.0 &17.3 &43.2 &36.4 &26.1 & \\  \hline
\end{tabular}
\end{table}

\item 问题：检验一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 中的变量 $x$ 是否显著？

\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.5. 例子2-1.参数的显著性检验（R程序）} 

\begin{itemize}
\item 解答：下述代码是按照理论一步步计算。

{\color{blue}
\begin{verbatim}
> rm(list=ls())  #删除工作空间的所有变量，以避免变量重名
> mydata <- read.table('ex2-1-fire-insurance.txt',header=T)
> x <- mydata$x  #这个向量保存了15个距离数据
> y <- mydata$y  #这个向量保存了15个损失数据
> xbar <- mean(x)  #计算均值
> ybar <- mean(y)  #计算均值
> Lxx <- sum((x-xbar)^2)  #计算基本统计量
> Lyy <- sum((y-ybar)^2)  #计算基本统计量
> Lxy <- sum((x-xbar)*(y-ybar))  #计算基本统计量
> beta1hat <- Lxy/Lxx  #计算参数 beta_1 的估计值
> beta0hat <- ybar - beta1hat*xbar #计算参数 beta_0 的估计值
> yhat <- beta0hat + beta1hat * x  #计算回归值
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.6. 例子2-1.参数的显著性检验（R程序，续）} 

\begin{itemize}
\item 解答：接着上一页的计算。

{\color{blue}
\begin{verbatim}
> e <- y-yhat  #计算残差值
> n <- nrow(mydata)  #计算样本容量，我们已知是15个
> sigma2hat <- sum(e^2)/(n-2)  #计算误差方差的估计值
> t <- beta1hat*sqrt(Lxx)/sqrt(sigma2hat)  #计算T统计量的值
> t005 <- -qt(0.05,n-2)  #计算T统计量的0.05上分位数
> pvalue <- 2*(1-pt(t,n-2))  #计算p值
> options(digits=12)  #设置显示有效数字12位
> options(scipen=999)  # 不用科学技术法显示小数
> t  #显示t值
> pvalue  #显示p值
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.6. 例子2-1.参数的显著性检验（解释计算的结果）} 

\begin{itemize}
\item 设显著性水平 $\alpha=0.1$, 参数估计值，检验统计量的值如下表， 

\begin{table}[ht!]\centering
%\caption{自变量显著性检验($\alpha=0.1$)}\vspace{0.05cm}
\begin{tabular}{ |M{2cm}|M{4cm}|M{5cm}|} \hline
$\hat{\beta}_1$ & 自变量的系数估计 & 4.91933072677  \\ \hline
$\hat{\sigma}^2$ & 误差方差的估计 & 5.36545964251  \\ \hline
$t$ & 检验的t值 & 12.5254205372  \\ \hline
$t_{0.05}(13)$ & 临界的t值 & 1.77093339599  \\ \hline
$p$ & 检验的p值 & 0.0000000124779999666  \\ \hline
\end{tabular}
\end{table}

\item 可以看到，检验统计量的 $t$ 值超过了临界值，检验的 $p$ 值也小于显著性水平 $\alpha=0.1$. 因此拒绝原假设，认为参数 $\beta_1$ 在模型中是显著的。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.8. 例子2-1.参数的显著性检验（直接调用回归函数）} 

\begin{itemize}
\item 显著性检验也可以直接从 \,{\color{blue}\verb+summary(lm(y~x))+} 函数的结果读出。
{\footnotesize\color{blue}
\begin{verbatim}
> lm01 <- lm(y~x)  换种方法，直接调用回归函数
> summary(lm01)  #查看回归结果
... ...
Coefficients:
                Estimate   Std. Error  t value       Pr(>|t|)    
(Intercept) 10.277928550  1.420277811  7.23656 0.000006585564 ***
x            4.919330727  0.392747749 12.52542 0.000000012478 ***
... ...
\end{verbatim}
}

\item 在上述输出结果中，最后一行的四个数字，分别是自变量的系数这个参数的估计值 $\hat{\beta}_1$, 该参数的标准误 $\hat{\sigma}/\sqrt{L_{xx}}$, 检验统计量 $T=\frac{\hat{\beta}_1}{\hat{\sigma}/\sqrt{L_{xx}} } $ 的统计值 $t$, 以及检验的 $p$ 值 $p=\text{P}(T>t)$.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.9. 回归方程的显著性检验（方差分析）}

\begin{itemize}
\item {\color{red}问题：一元线性回归模型 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x$ 的整体表现是否显著？}

\item 解答：检验的基本思想是方差分析。考虑平方和分解式
\begin{equation*}
\begin{array}{ccccc}
\sum\limits_{i=1}^{n}(y_i-\bar{y})^2 &=& \sum\limits_{i=1}^{n}(\hat{y}_i-\bar{y})^2 &+& \sum\limits_{i=1}^{n}(y_i-\hat{y}_i)^2, \\
SST &=& SSR &+& SSE.
\end{array}
\end{equation*}

\item 思路：如果 $SSR$ 占了 $SST$ 的绝大部分，那么认为这个模型式成功的：
\begin{itemize}
\item 总离差平方和 $SST$ 反映了因变量 $y$ 的不确定性。
\item 回归平方和 $SSR$ 反映了模型所能解释的因变量 $y$ 的不确定性。
\item 残差平方和 $SSE$ 反映了模型之外的因素引起的因变量 $y$ 的不确定性。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.10.a. 回归方程的显著性检验（方差分析，续）}

\begin{itemize}
\item 构造下述检验统计量，并求出当 $H_0: \beta_1=0$ 成立时所服从的分布，
\[ F=\frac{SSR/1}{SSE/(n-2)} \sim F(1,n-2).\]

\item 设显著性水平为 $\alpha$, 代入样本数据，当统计量 $F$ 的值大于临界值 $F_{\alpha}(1,n-2)$ 时，拒绝零假设，认为回归方程是显著的。方差分析的数据可以很整齐地放在一张{\color{red}方差分析表}里。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.10.b. 回归方程的显著性检验（方差分析，续）}

%\begin{itemize}

{\footnotesize
\begin{table}[ht!]\centering
\caption{方差分析表}\vspace{0.1cm}
\begin{tabular}{ |M{1.6cm}|M{1.2cm}|M{1.2cm}|M{1.9cm}|M{2cm}|M{2cm}|} \hline
方差来源 & 自由度 & 平方和 & 均方 & F 值 & p值 \\  \hline 
回归 & $1$ & $SSR$ & $SSR/1$ & $f=\frac{SSR/1}{SSE/(n-2)}$ & $\text{P}(F>f)$ \\  \hline 
残差 & $n-2$ & $SSE$ & ${SSE}/{(n-2)}$ & & \\  \hline 
总和 & $n-1$ & $SST$ & & & \\  \hline 
\end{tabular}
\end{table}
}

%\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.11. 例子2-1.模型的显著性检验} 

\begin{itemize}
\item {\color{red}问题：对例2-1的线性回归模型 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x$ 进行方差分析。}

\item 解答：可以按定义计算，也可以使用现成的R函数进行计算。

{\footnotesize\color{blue}
\begin{verbatim}
rm(list=ls())  #从零开始
mydata <- read.table('ex2-1-fire-insurance.txt',header=T)
x <- mydata$x
y <- mydata$y
lm01 <- lm(y~x, data=mydata)  #计算线性回归模型
yhat <- lm01$fitted.values  #从模型结果提取回归值
ybar <- mean(y)
SST <- sum((y-ybar)^2)  #按定义计算总离差平方和
SSR <- sum((yhat-ybar)^2)  #按定义计算回归平方和
SSE <- sum((y-yhat)^2)  #按定义计算残差平方和
n <- nrow(mydata)
f <- SSR/(SSE/(n-2))  #按定义计算F统计量的值
\end{verbatim}
}

%\item 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.11. 例子2-1.模型的显著性检验（续）} 

\begin{itemize}
\item 解答：也可以直接使用 \,{\color{blue}\verb+anova()+} 函数得到方差分析表。
{\footnotesize\color{blue}
\begin{verbatim}
> anova(lm01)
Analysis of Variance Table

Response: y
          Df      Sum Sq     Mean Sq   F value         Pr(>F)    
x          1 841.7663580 841.7663580 156.88616 0.000000012478 ***
Residuals 13  69.7509754   5.3654596                             
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\end{verbatim}
}

\item 结果表明，$F$ 统计量的 $p$ 值远远小于显著性水平，所以这个线性回归模型是非常显著的。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.13. 相关系数的显著性检验}

\begin{itemize}
\item 定义：设两个变量 $(x,y)$ 有样本 $\{(x_i,y_i),1\le i\le n\}$, 则定义它们的{\color{red}简单相关系数}
（也称{\color{red}样本相关系数}，简称{\color{red}相关系数}）为 
\[ r = \frac{\sum\limits_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y}) }{\sqrt{\sum\limits_{i=1}^{n}(x_i-\bar{x})^2} \sqrt{\sum\limits_{i=1}^{n}(y_i-\bar{y})^2} } = \frac{L_{xy}}{\sqrt{L_{xx}} \sqrt{L_{yy}} }. \]

\item 性质：简单相关系数的取值总在区间 $[-1,1]$ 上。可证下述结论：
\begin{itemize}
\item 完全负相关：$r=-1$ 当且仅当存在常数 $k<0$ 使得 $y_i=kx_i, 1\le i\le n$. 
\item 完全正相关：$r=1$ 当且仅当存在常数 $k>0$ 使得 $y_i=kx_i, 1\le i\le n$. 
\end{itemize}

\item 简单相关系数描述了变量之间的线性关系，不能反映其它函数关系。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.14. 相关系数的显著性检验（t统计量）}

\begin{itemize}
\item {\color{red}问题：如何使用 $t$ 统计量来检验两个变量之间的简单相关系数是否显著不等于零？}

\item 解答：
\begin{itemize}
\item 零假设和备选假设为 $H_0: r=0, \text{  vs.  }H_1:r\neq 0$.
\item 当零假设为真时，统计量 $T=\frac{\sqrt{n-2}r}{\sqrt{1-r^2}}$ 服从 $t(n-2)$ 分布。
\item 如果统计值 $t$ 的绝对值大于临界值 $t_{\alpha/2}(n-2)$, 那么拒绝额零假设，认为简单相关系数显著不等于零。
\end{itemize}

\item 一些术语：
\begin{itemize}
\item 高度显著的线性关系：如果相关系数非常接近 $\pm 1$, 超过临界值(需查表)。
\item 没有明显的线性关系：如果相关系数非常接近 $0$, 超过临界值(需查表)。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.15. 例子2-1. 变量之间的相关系数检验} 

\begin{itemize}
\item {\color{red}问题：检验自变量和因变量之间的简单相关系数是否显著不等于零。}

\item 解答一：按定义、或使用R函数 \,{\color{blue}\verb+cor()+} 计算相关系数的值，然后查表找到临界值，进行判断。

{\footnotesize\color{blue}
\begin{verbatim}
> cor(x,y,method='pearson')
[1] 0.960977715132
\end{verbatim}
}

\item 查表（262页《简单相关系数临界值表》），当$n-2=13$ 时，5\% 和 1\% 的临界值分别为 0.514 和 0.641. 
由于现在的 $r=0.961>0.641$, 因此认为变量之间有高度显著的线性关系。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.4.15. 例子2-1. 变量之间的相关系数检验（续）} 

\begin{itemize}
\item 解答二：使用相关系数检验的R函数 \,{\color{blue}\verb+cor.test()+} 直接得出回答。
{\footnotesize\color{blue}
\begin{verbatim}
> cor.test(x,y,method='pearson',conf.level=0.99)
	Pearson's product-moment correlation
data:  x and y
t = 12.52542054, df = 13, p-value = 0.0000000124780001
alternative hypothesis: true correlation is not equal to 0
99 percent confidence interval:
 0.838159653886 0.991045164426
sample estimates:
           cor 
0.960977715132 
\end{verbatim}
}

\item 可以看到 $T$ 统计量的值，还有 $p$ 值等。因为 $p$ 值远远小于显著性水平（设 $\alpha=0.01$），所以拒绝零假设，认为
\,{\color{blue}\text{true correlation is not equal to 0}}, 即简单相关系数显著不等于零。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.17. 思考题1}

\begin{itemize}
\item {\color{red}问题：一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 的回归系数 $\beta_1$ 是否显著不等于零？
从数理统计的角度来回答这个问题，是怎样思考的？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.4.18. 思考题2}

\begin{itemize}
\item {\color{red}问题：一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 是否显著？怎样理解这个问题本身，以及方差分析的解决思路？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.1. 残差}

\begin{itemize}
\item {\color{red}问题：什么是残差？残差与误差的区别是什么？}

\item 解答：
\begin{itemize}
\item 在使用样本数据 $\{(x_i,y_i),1\le i\le n\}$ 得到经验回归模型 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x$ 之后，就得到了因变量的回归值 $\hat{y}_i=\hat{\beta}_0+\hat{\beta}_1x_i$. {\color{red}残差}就是因变量的观测值与回归值的差：$e_i=y_i-\hat{y}_i, 1\le i\le n$. 

\item 在应用回归分析的术语中，{\color{red}误差}指的是理论回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 中的随机变量 $\varepsilon$. 样本回归模型 $y_i=\beta_0+\beta_1x_i+\varepsilon_i$ 中的 $\{\varepsilon_i \,\,(1\le i\le n)\}$ 也称为误差，这些也是随机变量，基本假设（高斯-马尔可夫条件）要求它们的均值为零，方差相同，两两不相关。
\item 残差 $\{e_i \,\,(1\le i\le n)\}$ 是样本数据 $\{(x_i,y_i)\,\,(1\le i\le n)\}$ 的函数，相互之间是不独立的。一个最明显的关系式就是
$e_1+e_2+\cdots+e_n=0$. %\[ \sum\limits_{i=1}^{n} e_i=0. \] 

\item 在样本数据 $\{(x_i,y_i)\}$ 代入观测值之前，残差 $\{e_i \}$ 也是随机变量。

\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.2. 残差图}

\begin{itemize}
\item {\color{red}问题：什么是残差图？}

\item 解答：把残差 $e_i$ 画在纵坐标上，把自变量 $x_i$ （或数据序号 $i$, 或回归值 $y_i$）画在横坐标上，得到的散点图就称为残差图。 

\item 残差还有其它几种变形：标准化残差、学生化残差、和删除残差、删除学生化残差等。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.3. 残差的性质1}

\begin{itemize}
\item {\color{red}性质1：残差的数学期望等于零。}

\item 证明：$\text{E}(e_i)=\text{E}(y_i-\hat{y}_i)=\text{E}(y_i)-\text{E}(\hat{y}_i)=(\beta_0+\beta_1x_i)-(\beta_0+\beta_1x_i)=0$.

\item 解释：
\begin{itemize}
\item 第一个等号是残差的定义。
\item 第二个等号是数学期望的性质。
\item 第三个等号第一部分 $\text{E}(y_i)=\beta_0+\beta_1x_i$ 是因为基本假设要求 $\text{E}(\varepsilon_i)=0$.
\item 第三个等号第二部分 $\text{E}(\hat{y}_i)=\text{E}(\hat{\beta}_0+\hat{\beta}_1x_i)= \text{E}(\hat{\beta}_0)+\text{E}(\hat{\beta}_1)x_i=\beta_0+\beta_1x_i$.  \\ 这里用到了两个参数的最小二乘估计的无偏性质。

\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.4. 残差的性质2}

\begin{itemize}
\item {\color{red}性质2：残差的方差有如下公式：
 \[ \text{var}(e_i) = \left[ 1-\frac{1}{n} - \frac{(x_i-\bar{x})^2}{L_{xx}} \right] \sigma^2 = (1-h_{ii})\sigma^2. \] }

\item 证明：主要思路是根据残差的定义和协方差的线性性质，
\begin{eqnarray*}
\text{var}(e_i) &=& \text{cov}(e_i,e_i) = \text{cov}(y_i-\hat{\beta}_0-\hat{\beta}_1x_i, y_i-\hat{\beta}_0-\hat{\beta}_1x_i) \\
&=& \text{cov}(y_i,y_i)+ \text{cov}(\hat{\beta}_0,\hat{\beta}_0)+\text{cov}(\hat{\beta}_1,\hat{\beta}_1)x_i^2 \\
&& - 2\text{cov}(y_i,\hat{\beta}_0) - 2\text{cov}(y_i,\hat{\beta}_1)x_i + 2\text{cov}(\hat{\beta}_0,\hat{\beta}_1)x_i \\
&=& \sigma^2 + \cdots \text{ the space is limited here ...} 
\end{eqnarray*}

\item 注解：在第三章多元线性回归模型里，使用矩阵形式进行计算，我们将会有更简洁的证明。
不过这里的计算对于学习数理统计很有帮助。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.5. 残差的性质3}

\begin{itemize}
\item {\color{red}性质3：残差满足约束条件：
\begin{eqnarray*}
e_1+e_2+\cdots+e_n &=& 0, \\
x_1e_1+x_2e_2+\cdots+x_ne_n &=& 0.
\end{eqnarray*}
}

\item 注解：这个性质说明，残差向量 $(e_1,\cdots,e_n)$ 与全壹向量 $(1,\cdots,1)$ 互相垂直，与自变量数据向量 $(x_1,\cdots,x_n)$ 也互相垂直。

\item 证明：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.6. 残差的性质3（几何解释）}

\begin{itemize}
\item {\color{red}定理：最小二乘法是欧氏空间 $\mathbb{R}^n$ 中因变量的数据向量 $(y_1,\cdots,y_n)$ 向全壹向量 $(1,\cdots,1)$ 与自变量的数据向量 $(x_1,\cdots,x_n)$ 生成的二维子空间的正交投影。}


\item 任务：将经验回归方程 $\hat{y}_i=\hat{\beta}_0+\hat{\beta}_1x_i\, (1\le i\le n)$ 用图像来加以说明。

\item 图像：

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.7. 改进的残差}

\begin{itemize}
\item 定义：标准化残差 $zre$ 和学生化残差 $sre$ 分别是指
{\color{red}\[ \boxed{zre = \frac{e_i}{\hat{\sigma}_i} },\quad \boxed{sre = \frac{e_i}{\hat{\sigma}_i\sqrt{1-h_{ii}}} }. \]  }

\item 问题：将残差标准化的好处是什么？

\item 解答：
\begin{itemize}
\item 标准化残差 $zre$ 是一个没有量纲的值，比较起来比较方便。一般认为 $|zre|>3$ 的观测值是异常值。
\item 学生化残差 $sre$ 也是一个没有量纲的值，而且因为 $e_i$ 的方差是 $\sigma^2(1-h_{ii})$, 所以学生化残差还统一了各个观测的方差。我们更常用 $|zre|>3$ 来判断一个观测值是不是异常值。
\end{itemize}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.5.8. 例子2-1的残差、标准化残差和学生化残差} 

\begin{itemize}
\item {\color{red}问题：计算例子2-1中的一元线性回归模型的三种残差，并画出残差图。}

\item 解答：

{\color{blue}
\begin{verbatim}
rm(list=ls())  #删除工作空间的所有变量
mydata <- read.table('ex2-1-fire-insurance.txt',header=T)
lm01 <- lm(y~x, data=mydata)  # 得到回归模型的结果
e <- resid(lm01)   #从回归结果中提取残差
e2<-lm01$residuals  #从回归结果中提取残差的另一种方法
s<-summary(lm01)  #将回归结果的摘要存为一个变量
sigmahat<- s$sigma  #从摘要提取误差项的标准差的估计
zre<-e/s$sigma  #计算标准化残差
sre<-rstandard(lm01)  #计算学生化残差
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.5.9. 例子2-1的残差、标准化残差和学生化残差} 

\begin{itemize}
\item {\color{red}问题：画出例子2-1中的三种残差的图像。}

\item 解答：

{\color{blue}
\begin{verbatim}
par(mfrow=c(2,2))  #准备画四个子图，排成两行两列
plot(mydata$x,e)  #自变量为横坐标，残差为纵坐标
plot(e)  #数据序号为横坐标，残差为纵坐标
plot(zre)  #标准化残差
plot(sre)  #学生化残差
par(mfrow=c(1,1))
\end{verbatim}
}

\item 观察最后一个残差图，可以看到学生化残差的绝对值都不超过1.5, 因此没有异常值。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.10. 例子2-1的四个残差图}

\begin{center}
\includegraphics[height=0.7\textheight, width=0.7\textwidth]{ex2-1-fire-insurance-residuals.png}
\end{center}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.11. 思考题1}

\begin{itemize}
\item {\color{red}问题：在回归分析中，误差和残差的区别是什么？在代入样本数据之前，残差 $e_i$ 是个随机变量，这个随机变量是哪些随机变量的函数？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.5.12. 思考题2}

\begin{itemize}
\item  {\color{red}问题：使用矩阵的符号，证明残差的性质3:
\begin{eqnarray*}
e_1+e_2+\cdots+e_n &=& 0, \\
x_1e_1+x_2e_2+\cdots+x_ne_n &=& 0.
\end{eqnarray*}
}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.6.1. 什么是回归系数的区间估计}

\begin{itemize}
\item {\color{red}问题：解释参数的点估计和区间估计的区别与联系。}

\item 解答：（参考《数理统计》参数估计那一章。）
\begin{itemize}
\item 用最小二乘法得到的 $\hat{\beta}_0,\hat{\beta}_1$ 是参数 $\beta_0,\beta_1$ 的{\color{red}点估计}，也就是说，代入样本数据 $\{(x_i,y_i)\}$ 之后，能够得到这两个参数的估计值。
\item {\color{red}区间估计}是说，在给定{\color{red}显著性水平} $\alpha$ 之后，我们想为参数 $\beta_0,\beta_1$ 的分别找一个取值区间，参数的真值会以 $1-\alpha$ 的概率落在这个区间内。我们称 $1-\alpha$ 为{\color{red}置信水平}或{\color{red}置信度}，一般取 $\alpha=0.1$, $0.05$, 或 $0.01$, 相应的置信水平就是 $0.9$, $0.95$, 和 $0.99$.
\item 区间估计一般是以点估计为中心的一个区间。在同样的置信水平 $1-\alpha$ 下，区间估计的长度越短，说明这个点估计（区间的中间点）越精确。
\item 为给出区间估计，我们经常假设误差项的分布是正态分布。这样我们就能有参数估计量的分布，从而可以构造置信区间。
\end{itemize}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.6.2. 参数 $\beta_1$ 的区间估计}

\begin{itemize}
\item {\color{red}问题：在正态误差的假设下，求一元线性回归模型 $y_i=\beta_0+\beta_1x_i+\varepsilon_i$ 的参数 $\beta_1$ 的区间估计。}

\item 解答：
\begin{enumerate}
\item[1.] 首先回顾参数的最小二乘估计量为 \( \hat{\beta}_1 = \frac{L_{xy}}{L_{xx}} =
\frac{ \sum\limits_{i=1}^{n}(x_i-\bar{x})(y_i-\bar{y}) }{ \sum\limits_{i=1}^{n}(x_i-\bar{x})^2 } \).
\item[2.] 假设正态误差，即 $\varepsilon_i\overset{iid}{\sim} N(0,\sigma^2)$, 这个估计量服从正态分布
\[ \hat{\beta}_1\sim N(\beta_1, \frac{\sigma^2}{L_{xx}} ). \] 
\item[3.] 因为正态误差的方差 $\sigma^2$ 未知，我们构造下述 $t$ 统计量，并求出它的分布，
\[ T=\frac{\hat{\beta}_1-\beta_1}{\sqrt{{\sigma^2}/L_{xx} } } \sim t(n-2).\]

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.6.3. 参数 $\beta_1$ 的区间估计（续）}

\begin{itemize}
\item 解答：（续）
\begin{enumerate}
\item[4.] 画出自由度为 $n-2$ 的 $t$ 分布的概率密度函数，记 $t_{\alpha/2}(n-2)$ 是这个 $t(n-2)$ 分布的上 ${\alpha}/{2}$ 分位数， 则有
\[ \text{P} \left(  - t_{\alpha/2}(n-2) < \frac{\hat{\beta}_1-\beta_1}{\sqrt{{\sigma^2}/L_{xx} } } < t_{\alpha/2}(n-2) \right) = 1-\alpha. \]

\item[5.]  从上式的小括号中，解出 $\beta_1$, 即得到这个参数的区间估计，
{\color{red}
\[ \boxed{ 
\left[ \hat{\beta}_1- t_{\alpha/2} \cdot\sqrt{{\sigma^2}/L_{xx} },\quad  \hat{\beta}_1+ t_{\alpha/2} \cdot\sqrt{{\sigma^2}/L_{xx} } \right]
}. \]
}
这里为简单起见，将分位数 $t_{\alpha/2}(n-2)$ 记为 $t_{\alpha/2}$. 
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.6.4. 例子2-1的回归系数的区间估计} 

\begin{itemize}
\item {\color{red}问题：求例子2-1的参数 $\beta_1$ 的区间估计。设置信水平 $1-\alpha=0.9$. }

\item 解答：先做出回归模型，然后使用 \,{\color{blue}\verb+confint()+}函数得到置信区间为
\[ 4.22\le {\beta}_1 \le 5.61. \]

\vspace{-0.4cm}

{\footnotesize\color{blue}
\begin{verbatim}
> rm(list=ls())
> mydata<-read.table('ex2-1-fire-insurance.txt',head=T)
> lm01<-lm(y~x,data=mydata)  #计算一元线性回归模型
> confint(lm01)  #计算置信区间，默认置信水平 95%
               2.5 %    97.5 %
(Intercept) 7.209605 13.346252
x           4.070851  5.767811
> confint(lm01,level=0.9)  #计算置信区间，指定置信水平 90%
                 5 %      95 %
(Intercept) 7.762711 12.793146
x           4.223801  5.614861
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.6.5. 思考题1}

\begin{itemize}
\item {\color{red}问题：在一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$ 中，为得到参数 $\beta_1$ 的置信区间，我们构造的统计量是什么？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.6.6. }

\begin{itemize}
\item {\color{red}问题：什么是自由度为 $m$ 的 $t$ 分布？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.1. 单值预测}

\begin{itemize}
\item {\color{red}问题：研究某地区的小麦亩产量 $y$ 与施肥量 $x$ 的关系的时候，在 $n$ 块面积为1亩的土地上各施肥 $x_i$ 公斤，测得产量 $y_i$ 公斤。于是得到样本数据 $(x_i,y_i), \,\,1\le i \le n$. 现某农户在1亩土地上施肥 $x_0$ 公斤，请问预期产量是多少？}

\item 解答分两步：
\begin{enumerate}
\item 代入样本数据，得到经验回归模型 $\hat{y}=\hat{\beta}_0+\hat{\beta}_1x$.
\item 代入自变量的新的观测值 $x_0$, 得到预测值 $\hat{y}_0=\hat{\beta}_0+\hat{\beta}_1x_0$.
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.2. 因变量新值的预测区间}

\begin{itemize}
\item {\color{red}问题：已知样本数据 $(x_i,y_i), \,\,1\le i \le n$, 已知一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$. 设自变量有一个新的观测值 $x_0$. 求因变量的预测值 $\hat{y}_0$ 的置信区间。}

\item 解答：这里我们需要正态假设，即误差项服从正态分布 $\varepsilon_i\overset{iid}{\sim} N(0,\sigma^2)$. 
\begin{enumerate}
\item[1.] 预测值 $\hat{y}_0=\hat{\beta}_0+\hat{\beta}_1x_0$ 是 $\hat{\beta}_0, \hat{\beta}_1$ 线性函数，而 $\hat{\beta}_0, \hat{\beta}_1$ 是 $y_1, \cdots, y_n$ 的线性函数。根据正态假设，$y_i$ 服从正态分布，又根据回归分析的假设，自变量 $x_i$ 认为不是随机变量。根据相互独立的正态分布的线性组合仍是正态分布，$\hat{y}_0$ 服从正态分布。
\item[2.] 根据2.3节的计算，$\hat{\beta}_0, \hat{\beta}_1$ 是 $\beta_0,\beta_1$ 的无偏估计，所以 $\hat{y}_0$ 的数学期望为 
{\color{red}\[ \boxed{ \text{E}(\hat{y}_0) = \beta_0+\beta_1x_0 }. \] } 
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.3. 因变量新值的预测区间（续）}

\begin{itemize}
\item 解答（续）：
\begin{enumerate}
\item[3.] 主要困难是计算 $\hat{y}_0$ 方差，具体看下一页。结论为 
{\color{red} \[ \boxed{\text{var}(\hat{y}_0)=h_{00}\sigma^2 }, \quad \text{其中}\quad \boxed{h_{00} = \frac{1}{n}+\frac{(x_0-\bar{x})^2}{L_{xx}}\sigma^2 }. \] }
\item[4.] 因为 $y_0$ 与 $y_1,\cdots,y_n$ 相互独立，所以 $y_0$ 与 $\hat{y}_0$ 也相互独立，因此
\[ \text{var}(y_0-\hat{y}_0) = \text{var}(y_0)+\text{var}(\hat{y}_0)=(1+h_{00})\sigma^2.\] 

\item[5.] 从 $y_0-\hat{y}_0\sim N(0,(1+h_{00})\sigma^2)$ 出发，构造下述统计量
\[ t= \frac{y_0-\hat{y}_0}{\sqrt{ (1+h_{00})\hat{\sigma}^2 }} \sim t(n-2). \]

\end{enumerate}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.4. 因变量新值的预测区间（又续）}

\begin{itemize}
\item 解答（又续）：
\begin{enumerate}
\item[6.] 画出 $t$ 分布的概率密度函数，给定置信水平 $1-\alpha$, 写出下式
\[ \text{P}\left( -t_{\alpha/2} \le \frac{y_0-\hat{y}_0}{\sqrt{1+h_{00}} \hat{\sigma} } \le t_{\alpha/2} \right) = 1-\alpha. \]

\item[7.] 从小括号中的两个不等式，可得 $y_0$ 的置信区间
{\color{red} \[ \boxed{ \hat{y}_0 - t_{\alpha/2} \sqrt{1+h_{00}} \hat{\sigma} \le y_0 \le \hat{y}_0 + t_{\alpha/2} \sqrt{1+h_{00}} \hat{\sigma} }. \] }

\vspace{-0.5cm}

\item[8.] 这里为简化记号，将 $t_{\alpha/2}(n-2)$ 简写成了 $t_{\alpha/2}$. 

\end{enumerate}

\item 注解：在统计软件里，当自变量 $x_0$ 取遍所有可能的值，因变量的预测值的置信区间形成所谓的“预测带”，或称“宽带”。
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.5. 计算 $\hat{y}_0$ 的方差}

\begin{itemize}
\item {\color{red}问题：计算 $\hat{y}_0=\hat{\beta}_0+\hat{\beta}_1x_0$ 的方差。}
\item 解答：
\begin{enumerate}
\item 首先将 $\hat{y}_0$ 写成 $y_1,\cdots,y_n$ 的线性组合。根据 $\hat{\beta}_0=\bar{y}-\hat{\beta}_1\bar{x}$ 可得 
\begin{eqnarray*}
\hat{y}_0=\hat{\beta}_0+\hat{\beta}_1x_0 = \bar{y}-\hat{\beta}_1\bar{x} + \hat{\beta}_1x_0
=\sum\limits_{i=1}^{n} \left[ \frac{1}{n} + \frac{(x_i-\bar{x})(x_0-\bar{x})}{L_{xx}} \right] y_i.
\end{eqnarray*}

\item 根据 $y_1,\cdots,y_n$ 相互独立，且方差都是 $\sigma^2$, 可得
\begin{eqnarray*}
{\color{red}\text{var}}(\hat{y}) = \sum\limits_{i=1}^{n} \left[ \frac{1}{n} + \frac{(x_i-\bar{x})(x_0-\bar{x})}{L_{xx}} \right]^2 {\color{red}\text{var}}(y_i) = \left[ \frac{1}{n} +\frac{(x_0-\bar{x})^2}{L_{xx}} \right]\sigma^2.
\end{eqnarray*}

\item 注解：上式第一个等号是方差的性质，系数提出来要加平方。第二个等号是将每一项平方展开，然后根据 $\sum\limits_{i=1}^{n}(x_i-\bar{x})=0$ 得到。

\end{enumerate}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.6. 因变量新值的平均值的区间预测}

\begin{itemize}
\item {\color{red}问题：已知样本数据 $(x_i,y_i), \,\,1\le i \le n$, 已知一元线性回归模型 $y=\beta_0+\beta_1x+\varepsilon$. 设自变量有一个新的观测值 $x_0$. 求因变量的新值的平均值 $\text{E}({y}_0)$ 的置信区间。}

\item 解答：首先由 $y_0=\beta_0+\beta_1x_0+\varepsilon_0$ 可得 $\text{E}({y}_0)=\beta_0+\beta_1x_0$. 
\begin{enumerate}
\item[1.] 注意到 $\beta_0$ 和 $\beta_1$ 是未知参数，$\text{E}({y}_0)$ 也是一个未知参数。
可知 $\text{E}({y}_0)$ 的点估计仍然是 $\hat{y}_0=\hat{\beta}_0+\hat{\beta}_1x_0$. 

\item[2.] 根据前面课件第2-3页的计算，$\hat{y}_0\sim N(\beta_0+\beta_1x_0, h_{00}\sigma^2)$. 于是有
\[ \hat{y}_0 - \text{E}(y_0) \sim N(0,h_{00}\sigma^2). \]

\item[3.] 构造下述统计量，注意方差 $\sigma^2$ 因为未知，所以由 $\hat{\sigma}^2$ 代替，
\[ t= \frac{\hat{y}_0 - \text{E}(y_0)}{\sqrt{h_{00}}\hat{\sigma} } \sim t(n-2). \]

\end{enumerate}
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.7. 因变量新值的平均值的区间预测（续）}

\begin{itemize}
\item 解答（续）：
\begin{enumerate}

\item[4.] 根据同样的分位数，得到概率计算表达式
\[ \text{P}\left( -t_{\alpha/2} \le \frac{\hat{y}_0- \text{E}(y_0)}{\sqrt{h_{00}} \hat{\sigma} } \le t_{\alpha/2} \right) = 1-\alpha. \]

\item[5.] 从小括号中的两个不等式，可得 $y_0$ 的置信区间
{\color{red} \[ \boxed{ \hat{y}_0 - t_{\alpha/2} \sqrt{h_{00}} \hat{\sigma} \le \text{E}(y_0) \le \hat{y}_0 + t_{\alpha/2} \sqrt{h_{00}} \hat{\sigma} }. \] }

\end{enumerate}

\vspace{-0.3cm}

\item 注解：在统计软件里，当自变量 $x_0$ 取遍所有的值，得到的因变量的新值的平均值 $\text{E}(y_0)$ 的置信区间，形成了所谓的“置信带”，或称“窄带”。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.7.8. 例子2-1. 火灾损失的预测区间与置信区间 } 

\begin{itemize}
\item {\color{red}问题：设有一户人家距离消防站距离 $x_0=3.5$ 公里。计算因火灾可能造成的损失 $y_0$ 的预测区间和置信区间。设置信水平 $1-\alpha=0.95$. }

\item 解答：都是使用 \,{\color{blue}\verb+predict()+} 函数。

{\color{blue}
\begin{verbatim}
> newdata <- data.frame(x=3.5)
> ypred <- predict(lm01,newdata,int='p',level=0.95)
> ypred  #参数 int='p' 表明是求预测区间
       fit      lwr      upr
1 27.49559 22.32394 32.66723
> yconf <- predict(lm01,newdata,int='c',level=0.95)
> yconf  #参数 int='c' 表明是求置信区间
       fit     lwr      upr
1 27.49559 26.1901 28.80107
\end{verbatim}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{2.7.9. 例子2-1. 火灾损失的预测带与置信带}

\begin{itemize}
\item {\color{red}问题：画出火灾损失模型的预测带和置信带。}

\item 解答：函数 \,{\color{blue}\verb+predict()+} 的第二个参数是自变量的新值，可以一下子放很多值，返回值是相应的很多的预测区间或置信区间。
{\footnotesize\color{blue}
\begin{verbatim}
> x <- mydata$x  #为了后面引用方便起见
> y <- mydata$y  #为了后面引用方便起见
> newx <- data.frame(x=seq(0.5,7,0.25))  #自变量的新值是个等差数列
> ypred <- predict(lm01,newx,int='p')  #计算预测带，p=prediction
> yconf <- predict(lm01,newx,int='c')  #计算置信带，c=confidence
> plot(x,y,xlim=range(x,newx),ylim=range(y,ypred,yconf))
> matlines(newx,ypred,lty=c(1,2,2),col='red')  #在刚才的图上继续画线
> matlines(newx,yconf,lty=c(1,3,3),col='blue')  #在刚才的图上继续画线
\end{verbatim}
}

\item 注解：函数 \,{\color{blue}\verb+range()+} 是计算一些数组的元素的最小值和最大值，这里是确保所有数据都出现在图像中。
参数 \,{\color{blue}\verb+xlim+} 和 \,{\color{blue}\verb+ylim+} 分别指定图像的左右界和上下界。
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.10. 例子2-1. 火灾损失的预测带与置信带（图像）}

\begin{center}
\includegraphics[height=0.7\textheight, width=0.9\textwidth]{ex2-1-fire-insurance-prediction.png}
\end{center}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.11. 思考题1}

\begin{itemize}
\item {\color{red}问题：当自变量的新值在什么范围内的时候，因变量的新值的预测区间的长度相对较短，也即因变量的点估计较为准确？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{2.7.12. 思考题2}

\begin{itemize}
\item {\color{red}问题：如何理解因变量的新值 $y_0$ 与因变量的新值的期望 $\text{E}(y_0)$ 的差别？}

\item 解答：


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}



